
// Auxiliary things: graphs, tables, etc.

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

bysort countrycode upstream (downstream): egen bsshare=mean(fraction)
bysort countrycode upstream (downstream): egen bslshare=mean(logfraction)
bysort countrycode upstream (downstream): egen bsdshare=mean(domfraction)

gen lbs=log(bsshare)
gen lbsd=log(bsdshare)


// ------------------------------------------------------

// Boxplot on dispersion of I-O shares

// all inputs
graph drop _all
// box plot in levels
graph hbox bsshare if downstream==1, over(upstream, sort(1) label(labsize(vsmall))) nooutside  scheme(s2mono) xsize(10) ysize(14)  bgcolor(white) graphregion(color(white)) ytitle("Average expenditure share on sector") note("") name(level)  nodraw
// box plot in logs
graph hbox lbs if downstream==1, over(upstream, sort(1) label(labsize(vsmall))) nooutside scheme(s2mono) xsize(10) ysize(14)  bgcolor(white) graphregion(color(white)) ytitle("Log average expenditure share on sector") note("") name(log) nodraw

graph combine level log, ycommon scheme(s2mono)  graphregion(color(white))

graph export output/boxplot-dispersion.eps, replace
graph export output/boxplot-dispersion.pdf, replace

// domestic only
// box plot in levels
graph hbox bsdshare if downstream==1, over(upstream, sort(1)) nooutside title({bf:Cross-country distribution of input shares by upstream sector}, span) subtitle("Unweighted averages across downstream sectors", span size(small)) scheme(s2mono) xsize(10) ysize(14) note("Source: GTAP and author's calculations. Excludes outliers.", span) bgcolor(white) graphregion(color(white)) name(dlevel)
// box plot in logs
graph hbox lbsd if downstream==1, over(upstream, sort(1)) nooutside title({bf:Cross-country distribution of input shares by upstream sector}, span) subtitle("Unweighted averages across downstream sectors", span size(small)) scheme(s2mono) xsize(10) ysize(14) note("Source: GTAP and author's calculations. Excludes outliers.", span) bgcolor(white) graphregion(color(white)) name(dlog)
graph combine dlevel dlog, ycommon scheme(s2mono)  graphregion(color(white))

// ------------------------------------------------------

// Create table with SD of IO shares by sector pair
// (this is for Table 3)

bysort upstream downstream (countrycode): egen sdio = sd(fraction)
bysort upstream downstream (countrycode): egen meanio = mean(fraction)
sum sdio if countrycode==840
sum sdio if countrycode==840 & upstream<=21
sum sdio if countrycode==840 & upstream>21
gen coeffvar = sdio/meanio
hist coeffvar if countrycode==840
sum coeffvar if countrycode==840
sum coeffvar if countrycode==840 & upstream<=21
sum coeffvar if countrycode==840 & upstream>21

count if sdio<0.02 & countrycode==840
count if sdio>=0.02 & sdio<0.04 & countrycode==840
count if sdio>=0.04 & sdio<0.06 & countrycode==840
count if sdio>=0.06 & sdio<0.08 & countrycode==840
count if sdio>=0.08 & sdio<0.1 & countrycode==840
count if sdio>=0.1 & sdio<0.15 & countrycode==840
count if sdio>=0.15 & countrycode==840

count if coeffvar<1 & countrycode==840
count if coeffvar>=1 & coeffvar<2 & countrycode==840
count if coeffvar>=2 & coeffvar<3 & countrycode==840
count if coeffvar>=3 & coeffvar<4 & countrycode==840
count if coeffvar>=4 & coeffvar<6 & countrycode==840
count if coeffvar>=6 & coeffvar<8 & countrycode==840
count if coeffvar>=8 & countrycode==840

// ------------------------------------------------------

// Pairwise correlation coefficient of IO tables:

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

egen country = group(countrycode)
preserve

duplicates drop country, force
keep country countrycode countrycode_str
save __countries.dta
restore

keep country countrycode upstream downstream fraction domfraction
reshape wide fraction domfraction countrycode, i(upstream downstream) j(country)
forvalues i=1/108 {
	forvalues j=`=`i'+1'/109 {
		qui cor fraction`i' fraction`j'
		gen corrcoeff_`i'_`j' = `r(rho)'
		qui cor domfraction`i' domfraction`j'
		gen dcorrcoeff_`i'_`j' = `r(rho)'
	}
}
drop if _n!=1
drop fraction* domfraction*
qui reshape long corrcoeff_ dcorrcoeff_ countrycode, i(upstream) j(ind) string
drop upstream downstream
hist corrcoeff_ ,  xtitle("Country-pair-wise correlation coefficient") graphregion(color(white)) scheme(s2mono)
graph export output/iotable-pairwise-correlation.pdf, replace

hist dcorrcoeff_ ,  xtitle("Country-pair-wise correlation coefficient") graphregion(color(white)) scheme(s2mono)
graph export output/iotable-d-pairwise-correlation.pdf, replace

save pairwise-correlations.dta, replace

clear all
use pairwise-correlations.dta, clear

gen pos=strpos(ind,"_")
gen country1 = substr(ind, 1, pos-1)
gen country2 = substr(ind, pos+1, .)
destring country1, replace
destring country2, replace
drop pos
rename country1 country
merge m:1 country using __countries.dta, keepusing(countrycode countrycode_str)
drop if _merge==2
drop _merge
rename country country1
rename countrycode countrycode1
rename countrycode_string countrycode_string1
rename country2 country
merge m:1 country using __countries.dta, keepusing(countrycode countrycode_str)
drop if _merge==2
drop _merge
rename country country2
rename countrycode countrycode2
rename countrycode_string countrycode_string2

rename countrycode_string1 iso31661countrychar3code
replace iso31661countrychar3code = "ROM" if iso31661countrychar3code=="ROU"
merge m:1 iso31661countrychar3code using "data/worldbankclassification-July2013.dta" , keepusing(incomegroup)
drop if _merge==2
drop _merge
rename incomegroup incomegroup1
rename  iso31661countrychar3code countrycode_string1
rename countrycode_string2 iso31661countrychar3code
replace iso31661countrychar3code = "ROM" if iso31661countrychar3code=="ROU"
merge m:1 iso31661countrychar3code using "data/worldbankclassification-July2013.dta" , keepusing(incomegroup)
drop if _merge==2
drop _merge
rename incomegroup incomegroup2
rename  iso31661countrychar3code countrycode_string2

gen isHighIncome1 = (strpos(incomegroup1, "High income: OECD")>0)
gen isHighIncome2 = (strpos(incomegroup2, "High income: OECD")>0)

twoway (hist corrcoeff_ , freq start(0) width(0.05) xtitle("Country-pair-wise correlation coefficient") graphregion(color(white)) mlabel("All countries") ) || ///
    (hist corrcoeff_ if (isHighIncome1==1) & (isHighIncome2==1) , freq start(0) width(0.05)  xtitle("Country-pair-wise correlation coefficient") graphregion(color(white)) fcolor(gs14) lcolor(black) lstyle(solid) mlabel("Within OECD countries")) ///
	 , scheme(s2mono) legend(label(1 "All countries") label(2 "OECD only"))
graph export output/iotable-pairwise-correlation-withoecd.pdf, replace 

capture erase __countries.dta
capture erase pairwise-correlations.dta

// **********************************************
// Appendix on z_ni
// **********************************************

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

drop if countrycode!=840

// summary statistics (for table)
sum n_div_gm n_div_f

cor n_div_gm n_div_f

// z_GM (z^(1)) 
reg n_div_gm i.upstream // upstream FE explains 18.5% of variation
reg n_div_gm i.downstream // downstream FE explains 8.4% of variation

// z_F (z^(2))
reg n_div_f i.upstream // upstream FE explains 18.7% of variation
reg n_div_f i.downstream // downstream FE explains 6.8% of variation

// **********************************************
// prepare this for exporting to mathematica:

clear
use litigiosity.dta

gen down = downstream
gen up = upstream
drop downstream upstream

keep down up n_div_gm n_div_f
reshape wide n_div_gm n_div_f , i(down) j(up)

order down n_div_gm* n_div_f*, seq

// now copy+paste to mathematica...

// **********************************************
// Table on most enforcement-intensive sectors

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

drop if countrycode!=840

gen m_gm = 10000*n_div_gm
gen m_f = 10000*n_div_f

bysort upstream (downstream): egen avg_gm = mean(m_gm)
bysort upstream (downstream): egen avg_f = mean(m_f)
keep upstream avg_gm avg_f
duplicates drop
sort avg_gm
// now copy+paste this into excel...

// **********************************************
// compare z measures to Rauch/Nunn

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

drop if countrycode!=840

gen rauch_con = 1-con
gen rauch_lib = 1-lib

// Nunn is weighted average of Rauch
bysort downstream (upstream): egen sumf = total(fraction)
gen rwt_con = (fraction/sumf) *rauch_con
gen rwt_lib = (fraction/sumf)*rauch_lib
bysort downstream (upstream): egen nunn_con = total(rwt_con)
bysort downstream (upstream): egen nunn_lib = total(rwt_lib)


// Levchenko is the Herfindahl of expenditure shares (that add up to one)
bysort downstream (upstream): egen totalf=total(fraction)
gen isharesq = (fraction/totalf)*(fraction/totalf)
bysort downstream (upstream): egen levchenko_ = total(isharesq)
gen levchenko = 1-levchenko_
drop levchenko_

spearman n_div_gm n_div_f levchenko nunn_con nunn_lib rauch_con rauch_lib

// **********************************************
// Comparison between z, Nunn, Levchenko in the main text

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

// keep one country only
drop if countrycode!=840


gen rauch_con = 1-con
gen rauch_lib = 1-lib

// Nunn is weighted average of Rauch
bysort downstream (upstream): egen sumf = total(fraction)
gen rwt_con = (fraction/sumf) *rauch_con
gen rwt_lib = (fraction/sumf)*rauch_lib
bysort downstream (upstream): egen nunn_con = total(rwt_con)
bysort downstream (upstream): egen nunn_lib = total(rwt_lib)

// merge in Levchenko
rename downstream myagg 
merge m:1 myagg using data/levchenko_by_myagg.dta
rename myagg downstream
drop _merge

bysort downstream (upstream): egen z_gm = median(n_div_gm)
bysort downstream (upstream): egen z_f = median(n_div_f)

bysort downstream (upstream): egen z_gm_wt = total(fraction*n_div_gm)
bysort downstream (upstream): egen z_f_wt = total(fraction*n_div_f)

duplicates drop downstream, force

keep downstream z_gm* z_f* nunn_con nunn_lib herf

gen lev = 1-herf

pwcorr z_gm z_f nunn_con nunn_lib lev

capture label var z_gm "$\bar{z}\_{n}^{(1)}$"
capture label var z_f "$\bar{z}\_{n}^{(2)}$"
capture label var z_gm_wt "$\tilde{z}\_{n}^{(1)}$"
capture label var z_f_wt "$\tilde{z}\_{n}^{(2)}$"
capture label var nunn_con "$\text{Nunn}^{\text{con}}\_{n}$"
capture label var nunn_lib "$\text{Nunn}^{\text{lib}}\_{n}$"
capture label var lev "$\text{Levchenko}\_{n}$"

estpost correlate  z_gm z_f nunn_con nunn_lib lev , matrix
esttab using output/comparison-table.tex, booktabs not unstack substitute(\_ _) noobs label replace nonumbers b(2) alignment(l) nostar

estpost correlate  z_gm_wt z_f_wt nunn_con nunn_lib lev , matrix
esttab using output/comparison-table2.tex, booktabs not unstack substitute(\_ _) noobs label replace nonumbers b(2) alignment(l) nostar

// **********************************************
// fraction of z explained by upstream/downstream FE
// (AER referee 1, 1.a)

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

// keep one country only
drop if countrycode!=840

reg n_div_gm i.upstream
reg n_div_gm i.downstream
reg n_div_f i.upstream
reg n_div_f i.downstream

// ********************
// What fraction of I-O tables is explained by fixed effects?
// ReStat Ref. 1 Q.4

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

save __temp.dta, replace

gen one = 1

sum fraction
qui areg fraction one, absorb(updown)
di "`e(r2)'"
qui reghdfe fraction one, absorb(updown countrycode)
di "`e(r2)'"
qui reghdfe fraction one, absorb(updown upcountry)
di "`e(r2)'"
qui reghdfe fraction one, absorb(updown upcountry downcountry)
di "`e(r2)'"

sum domfraction
qui areg domfraction one, absorb(updown)
di "`e(r2)'"
qui reghdfe domfraction one, absorb(updown countrycode)
di "`e(r2)'"
qui reghdfe domfraction one, absorb(updown upcountry)
di "`e(r2)'"
qui reghdfe domfraction one, absorb(updown upcountry downcountry)
di "`e(r2)'"

// ********************
// Variation in IO tables due to FDI?
// ReStat Ref. 1 Q.5

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

// merge in inward fdi stocks
rename countrycode host
rename downstream myagg
merge m:1 host myagg using data/fdimap_myagg.dta
drop if _merge==2
drop _merge

gen logfdi = log(fdi)
gen logfdisq = logfdi * logfdi

gen one = 1
gen hasfdi = (logfdi != .)

center fraction int_n_div_gm int_n_div_f, standardize casewise
// run
eststo clear
eststo : quietly reghdfe c_fraction one if hasfdi==1, absorb(updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
eststo : quietly reghdfe c_fraction one  if hasfdi==1, absorb(c.logfdi#i.upstream c.logfdisq#i.upstream updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol "Yes"
eststo : quietly reghdfe c_fraction one  if hasfdi==1, absorb(c.logfdi#i.updown c.logfdisq#i.updown updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol2 "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_gm  if hasfdi==1, absorb(updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_f  if hasfdi==1, absorb(updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_gm  if hasfdi==1, absorb(c.logfdi#i.upstream c.logfdisq#i.upstream updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_f  if hasfdi==1, absorb(c.logfdi#i.upstream c.logfdisq#i.upstream updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_gm  if hasfdi==1, absorb(c.logfdi#i.updown c.logfdisq#i.upstream updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol2 "Yes"
eststo : quietly reghdfe c_fraction c_int_n_div_f  if hasfdi==1, absorb(c.logfdi#i.updown c.logfdisq#i.upstream updown upcountry downcountry countrycode) vce(cluster countrycode) 
estadd local updownfe "Yes"
estadd local upfe "Yes"
estadd local downfe "Yes"
estadd local fdicontrol2 "Yes"
esttab, r2 se scalars(F) 
qui do labelvariables.do
esttab using tables\fdicontrol.tex, drop(*one* *cons*) nonotes noisily beta(a2) se(a2) scalars("fdicontrol \$i\$-specific polynomial in log FDI" "fdicontrol2 \$(n,i)\$-specific polynomial in log FDI" "updownfe \midrule Upstream \$\times\$ Downstream FEs" "upfe Upstream \$\times\$ Country FEs" "downfe Downstream \$\times\$ Country FEs" "r2 \midrule\$R^2\$" "N \$N\$") obslast label booktabs replace  substitute(\_ _) nomtitles mgroups("Dependent variable: \$X\_{ni}/X\_n\$", pattern(1 0 0 0 0 0)  prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})  )
drop c_*

// ********************
// Things that are correlated with the z measure
// ReStat Ref. 1 Q.3

clear
clear matrix
capture set memory 10g
capture set maxvar 32767
capture set matsize 11000

cd ${root}

qui do prepare.do

drop if countrycode !=840

rename upstream industry
merge m:1 industry using data/upstreamness_us.dta
assert _merge==3
rename industry upstream
drop _merge

merge 1:1 upstream downstream using data/upstreamness_ij.dta
assert _merge==3
drop _merge

do labelvariables.do
estpost correlate  n_div_gm n_div_f upstreamness_ij is , matrix
esttab using tables\other-correlation-table.tex, booktabs not unstack substitute(\_ _) noobs label replace nonumbers b(2) alignment(l) nostar

collapse (mean) n_div_gm n_div_f upstreamness , by(upstream)

rename upstream myagg
merge m:1 myagg using "data/cap_intensity/cap_myagg.dta"
drop _merge
rename myagg upstream
rename upstream myagg
merge m:1 myagg using "data/rnd_intensity/rnd_myagg.dta"
drop _merge
rename myagg upstream
rename upstream gtap
merge m:1 gtap using "data/skillintensity_myagg.dta"
drop _merge
rename gtap upstream

do labelvariables.do

estpost correlate  n_div_gm n_div_f upstreamness cap_intensity rnd_intensity fraction_cd, matrix
esttab using tables\other-correlation-table2.tex, booktabs not unstack substitute(\_ _) noobs label replace nonumbers b(2) alignment(l) nostar

clear
import delimited using data/upstreamness_ij.csv
rename upstreamness upstreamness_ij
save data/upstreamness_ij.dta, replace



